# https://github.com/USGS-R/dataRetrieval
# general R packages to install if you dont have them
# remove the "#" if you need to run the installations
# install.packages("devtools") # essential in installing other thigs
# install.packages("tidyverse") # installs a lot of things and ggplot
# install.packages("scales") # allows great scale formatting on ggplot
# install.packages("lubridate") # makes working with dates easier
# install.packages("janitor") # clean names of columns and other things
# install.packages("readxl") # read in excel files
# install.packages("plotly") # interactive plots
# devtools::install_github("thomasp85/patchwork") # multiple plots
# specific to this module
# install.packages("dataRetrieval") # USGS Data Retreiveal Method
Libraires
library(skimr)
Attaching package: ‘skimr’
The following object is masked from ‘package:knitr’:
kable
The following object is masked from ‘package:stats’:
filter
Read in file for …
# The site is Neversink and the site number is 01435000
# USGS 01435000 NEVERSINK RIVER NEAR CLARYVILLE NY
siteNo_monthly <- "01435000"
pCode_monthly <- "00060"
start.date_monthly <- "1937-11"
end.date_monthly <- "2019-08"
neversink_monthly.df <- readNWISstat(siteNumbers = siteNo_monthly,
parameterCd = pCode_monthly,
startDate = start.date_monthly,
endDate = end.date_monthly,
statReportType="Monthly",
statType="mean")
Please be aware the NWIS data service feeding this function is in BETA.
Data formatting could be changed at any time, and is not guaranteed
# rename the columns
neversink_monthly.df <- renameNWISColumns(neversink_monthly.df)
names(neversink_monthly.df)
[1] "agency_cd" "site_no" "parameter_cd" "ts_id" "loc_web_ds"
[6] "year_nu" "month_nu" "mean_va"
neversink_monthly.df <- neversink_monthly.df %>%
mutate(
date = ymd(paste(year_nu, month_nu, "01", sep="-"))
)
all.plot <- ggplot(neversink_monthly.df, aes(date, mean_va)) +
geom_point() +
geom_line()
all.plot
feb.plot <- neversink_monthly.df %>%
filter(month_nu == 2) %>%
ggplot(aes(date, mean_va)) +
geom_point() +
geom_line() +
labs(y="Mean Monthly Discharge m^3/sec", x="Date")
feb.plot
NA
We can make it interactive
ggplotly(feb.plot)
August data
aug.plot <- neversink_monthly.df %>%
filter(month_nu == 8) %>%
ggplot(aes(date, mean_va)) +
geom_point() +
geom_line() +
labs(y="Mean Monthly Discharge m^3/sec", x="Date")
aug.plot
feb_aug.plot <- feb.plot +
aug.plot +
plot_layout(ncol =1)
feb_aug.plot
feb_aug.plot <- feb.plot + geom_smooth(method="lm") +
aug.plot + geom_smooth(method="lm") +
plot_layout(ncol =1)
feb_aug.plot
summary(feb.model)
Call:
lm(formula = mean_va ~ date, data = .)
Residuals:
Min 1Q Median 3Q Max
-184.1 -114.4 -36.8 76.0 697.1
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.944e+02 5.148e+00 37.76 <2e-16 ***
date 8.601e-04 5.660e-04 1.52 0.129
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 147.5 on 943 degrees of freedom
Multiple R-squared: 0.002443, Adjusted R-squared: 0.001385
F-statistic: 2.309 on 1 and 943 DF, p-value: 0.1289
summary(aug.model)
Call:
lm(formula = mean_va ~ date, data = .)
Residuals:
Min 1Q Median 3Q Max
-184.1 -114.4 -36.8 76.0 697.1
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.944e+02 5.148e+00 37.76 <2e-16 ***
date 8.601e-04 5.660e-04 1.52 0.129
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 147.5 on 943 degrees of freedom
Multiple R-squared: 0.002443, Adjusted R-squared: 0.001385
F-statistic: 2.309 on 1 and 943 DF, p-value: 0.1289
Use this site to find a new location https://maps.waterdata.usgs.gov/mapper/index.html
# The site is Neversink and the site number is 01435000
# USGS SAGAVANIRKTOK R NR PUMP STA 3 AK
siteNo_monthly <- "15908000"
pCode_monthly <- "00060"
start.date_monthly <- "1900-01"
end.date_monthly <- "2019-08"
sag_monthly.df <- readNWISstat(siteNumbers = siteNo_monthly,
parameterCd = pCode_monthly,
startDate = start.date_monthly,
endDate = end.date_monthly,
statReportType="Monthly",
statType="mean")
Please be aware the NWIS data service feeding this function is in BETA.
Data formatting could be changed at any time, and is not guaranteed
# rename the columns
sag_monthly.df <- renameNWISColumns(sag_monthly.df)
names(sag_monthly.df)
[1] "agency_cd" "site_no" "parameter_cd" "ts_id" "loc_web_ds"
[6] "year_nu" "month_nu" "mean_va"
sag_monthly.df <- sag_monthly.df %>%
mutate(
date = ymd(paste(year_nu, month_nu, "01", sep="-"))
)
feb_new.plot <- sag_monthly.df %>%
filter(month_nu == 2) %>%
ggplot(aes(date, mean_va)) +
geom_point() +
geom_line() +
labs(y="Mean Monthly Discharge m^3/sec", x="Date")
feb_new.plot
NA
August data
aug_new.plot <- sag_monthly.df %>%
filter(month_nu == 8) %>%
ggplot(aes(date, mean_va)) +
geom_point() +
geom_line() +
labs(y="Mean Monthly Discharge m^3/sec", x="Date")
aug_new.plot
feb_aug_new.plot <- feb_new.plot +
aug_new.plot +
plot_layout(ncol =1)
feb_aug_new.plot
siteNo_peak <- "01435000"
pCode_peak <- "00060"
start.date_peak <- "1850-01-01"
end.date_peak <- "2019-08-02"
neversink_peak.df <- readNWISpeak(siteNumbers = siteNo_peak)
# rename the columns
neversink_peak.df <- renameNWISColumns(neversink_peak.df)
names(neversink_peak.df)
peak_flow.plot <- neversink_peak.df %>%
ggplot(aes(peak_dt , peak_va)) +
geom_point() +
geom_line()
ggplotly(peak_flow.plot)
skim(neversink_peak.df)
Skim summary statistics
n obs: 79
n variables: 13
── Variable type:character ────────────────────────────────────────────────────────────────────────────────────────────────────
variable missing complete n min max empty n_unique
ag_gage_ht_cd 76 3 79 1 3 0 2
ag_tm 79 0 79 NA NA 0 0
agency_cd 0 79 79 4 4 0 1
gage_ht_cd 37 42 79 1 3 0 4
peak_cd 73 6 79 1 1 0 3
peak_tm 79 0 79 NA NA 0 0
site_no 0 79 79 8 8 0 1
── Variable type:Date ─────────────────────────────────────────────────────────────────────────────────────────────────────────
variable missing complete n min max median n_unique
ag_dt 76 3 79 1948-02-14 1976-01-27 1960-04-04 3
peak_dt 0 79 79 1938-07-22 2017-02-25 1977-10-01 79
── Variable type:numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────
variable missing complete n mean sd p0 p25 p50 p75 p100 hist
ag_gage_ht 76 3 79 6.33 0.59 5.99 6 6 6.5 7.01 ▇▁▁▁▁▁▁▃
gage_ht 0 79 79 8.54 2.99 3.73 6.02 8.51 10.95 14.64 ▆▇▅▂▆▆▅▂
peak_va 0 79 79 7189.24 4497.07 1400 4215 6080 9090 23400 ▆▇▅▃▁▁▁▁
year_last_pk 78 1 79 1938 NA 1938 1938 1938 1938 1938 ▁▁▁▇▁▁▁▁
this is one way to do it but is not the best
neversink_floodfreq.df <- neversink_peak.df %>%
select(agency_cd, site_no, peak_dt, peak_va, gage_ht) %>%
arrange(desc(peak_va)) %>%
mutate(
year = year(peak_dt)
) %>%
group_by(year) %>%
filter(peak_va == max(peak_va)) %>%
mutate(rank = dense_rank(desc(peak_va)))
neversink_floodfreq.df <- neversink_floodfreq.df %>%
ungroup() %>%
mutate(total_n = n()) %>%
rowwise() %>%
mutate(probability = rank/(total_n+1))